#ifndef _FEATURE_EXTRACTER_
#define _FEATURE_EXTRACTER_

#pragma once

#include <fstream>
#include <iostream>
#include <sstream>
#include <vector>
#include <string>
#include <list>
#include <map>
using namespace std;

#include "Instance.h"
#include "common.h"

#include "NRMat.h"
using namespace nr;

#include "FVec.h"
#include "FeatureDictionary.h"
using namespace egstra;

namespace dparser {
	class FGen
	{
	private:
		FeatureDictionary _word_dict;

		FeatureDictionary _dict_joint;
		NRVec<const char *> _id_2_str_joint;
		FeatureDictionary _dict_a;
		NRVec<const char *> _id_2_str_a;
		FeatureDictionary _dict_b;
		NRVec<const char *> _id_2_str_b;

		FeatureDictionary _dict_unigram;
		FeatureDictionary _dict_bigram_joint;
		FeatureDictionary _dict_bigram_a;
		FeatureDictionary _dict_bigram_b;

		int _dim_unigram;
		int _dim_bigram_joint;
		int _dim_bigram_a;
		int _dim_bigram_b;

		int _offset_unigram_joint;
		int _offset_unigram_a;
		int _offset_unigram_b;
		int _offset_bigram_joint;
		int _offset_bigram_a;
		int _offset_bigram_b;
		
		int _total_feature_dim;

	private:
		string _name;
		bool _generation_mode;

	private: // options
		int _fcutoff; // only use features with freq >= _fcutoff 
		bool _english;

		bool _use_joint_features;
		bool _use_separate_features;

		bool _use_guide_postag;
		bool _use_guide_postag_prob;

	public:
		FGen() {
			_name = "FGen";
			_generation_mode = false;

			_dim_unigram = 0;
			_dim_bigram_joint = 0;
			_dim_bigram_a = 0;
			_dim_bigram_b = 0;

			_offset_unigram_joint = 0;
			_offset_unigram_a = 0;
			_offset_unigram_b = 0;
			_offset_bigram_joint = 0;
			_offset_bigram_a = 0;
			_offset_bigram_b = 0;

			_total_feature_dim = 0;
		}

		~FGen() {}
/*		void add_feature_frequency(const int freq) {
			_pos_feat_dict.add_frequency(freq);
		}
*/
		void process_options();
		void start_generation_mode() { _generation_mode = true; }
		void stop_generation_mode() { _generation_mode = false; }

		void dealloc_fvec_prob(Instance * const inst) const;
		void create_all_feature_vectors(Instance * const inst);
		void create_all_pos_features_according_to_tree(const Instance * const inst, sparsevec &sp_fv, const vector<string> &cpostags, const double scale = 1.0);
		void create_all_pos_features_when_create_dict(const Instance * const inst, const bool collect_word);


		int feature_dimentionality() const {
			return _total_feature_dim;
		}
		int tag_number() const {
			return _dict_joint.dimensionality();
		}
		int tag_number_a() const {
			return _dict_a.dimensionality();
		}
		int tag_number_b() const {
			return _dict_b.dimensionality();
		}
		void collect_word_postag( Instance * const inst, const bool collect_word=false); // when creating dictionaries,  collect word/postag
		int get_word_id(const string &word) {
			return _word_dict.getFeature(word, _generation_mode);
		}
		int get_pos_id(const string &pos) {
			const int id = _dict_joint.getFeature(pos, _generation_mode);
			if (id < 0) {
				cerr << "unknown pos type (joint): " << pos << endl;
				exit(-1);
			}
			return id;		
		}
		int get_pos_id_a(const string &pos) {
			const int id = _dict_a.getFeature(pos, _generation_mode);
			if (id < 0) {
				cerr << "unknown pos type (a): " << pos << endl;
				exit(-1);
			}
			return id;		
		}
		int get_pos_id_b(const string &pos) {
			const int id = _dict_b.getFeature(pos, _generation_mode);
			if (id < 0) {
				cerr << "unknown pos type (b): " << pos << endl;
				exit(-1);
			}
			return id;		
		}

		//int pos_id_dummy_joint() {
		//	return get_pos_id(DUMMY_CPOSTAG);
		//}
		//int pos_id_dummy_a() {
		//	return get_pos_id_a(DUMMY_CPOSTAG_SEP);
		//}
		//int pos_id_dummy_b() {
		//	return get_pos_id_b(DUMMY_CPOSTAG_SEP);
		//}

		const char *pos_id_2_str(const int pos_id) const {
			assert(pos_id >= 0 && pos_id < _id_2_str_joint.size());
			return _id_2_str_joint[pos_id];
		}
		const char *pos_id_2_str_a(const int pos_id) const {
			assert(pos_id >= 0 && pos_id < _id_2_str_a.size());
			return _id_2_str_a[pos_id];
		}
		const char *pos_id_2_str_b(const int pos_id) const {
			assert(pos_id >= 0 && pos_id < _id_2_str_b.size());
			return _id_2_str_b[pos_id];
		}

		void save_dictionaries(const string &dictdir) /*const*/;
		void load_dictionaries(const string &dictdir);

		void assign_predicted_tag_str(Instance * const inst) {
			const int len = inst->size();
			inst->predicted_tags_joint.resize(len);
			for (int i = 0; i < len; ++i) {
				inst->predicted_tags_joint[i] = pos_id_2_str(inst->predicted_tagids[i]);
			}
		}
		void assign_filtered_tag_str(Instance * const inst) {
			assert (false);
			const int len = inst->size();
			/*
			inst->filtered_tags.resize(len);
			for (int i = 0; i < len; ++i) {
				const vector<int> &ids = inst->filtered_tags_id[i];
				const int tagnum = ids.size();
				inst->filtered_tags[i].resize(tagnum);
				for (int j = 0; j < tagnum; ++j)
					inst->filtered_tags[i][j] = pos_id_2_str(ids[j]);
			}
			*/
		}
		
		void create_constrained_tag_matrix(Instance * const inst) {
			assert(!inst->constrained_tags_str.empty());
			inst->constrained_tags_str[0].clear();
			inst->constrained_tags_str[0].push_back(inst->cpostags[0]);

			const int len = inst->size();
			inst->constrained_tags.resize(len, tag_number());
			inst->constrained_tags = false;
			for (int i = 0; i < len; ++i) {
				const vector<string> &tags = inst->constrained_tags_str[i];
				if (tags.empty()) {
					for (int ti = 0; ti < tag_number(); ++ti) {
						inst->constrained_tags[i][ti] = true;
					}
				} else {
					for (int ti = 0; ti < tags.size(); ++ti) {
						inst->constrained_tags[i][ get_pos_id(tags[ti]) ] = true;
					}
				}
			}
		}
		
	private:
		void addPOSFeature_unigram(const Instance *inst, const int node_id, list<string> &feats_str, bool use_guide_pos = false) const;
		void addPOSFeature_unigram_guide(const Instance *inst, const int node_id, list<string> &feats_str, list<double> &probs) const;
		void addPOSFeature_bigram( const Instance *inst, const int node_id, const string &cpostag_L1, list<string> &feats_str ) const;

		void usage(const char * const mesg) const;
	};
} // namespace gparser_space

#endif


